#!/bin/bash


if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ -z "$*" ] ; then
   DOC=(
      "usage  $(basename "$0") [-d <subst_dic.txt>] [-f | -l] <file>.."
      "  Apply the en->en-Oxford (or <subst_dic.txt> if specified) substitution list to each <file>."
      "  <subst_dic.txt> is a list of lines in the form '.*->.*'."
      "  In particular, codespell dictionaries are valid."
      "  By default, substitutions are only suggested."
      "  If -f is set, substitutions are silently applied."
      "  If -l is set, the list of files that require substitutions is listed."
   )
   ( IFS=$'\n'; echo "${DOC[*]}" ) >&2
   exit 0
fi

DIC="$( dirname -- "${BASH_SOURCE[0]}" )"/dictionary_en_to_en-OX_AUTOGENERATED.txt
while true; do
   if [ "$1" = "-f" ] ; then
      shift
      FORCE=1
   elif [ "$1" = '-d' ] ; then
      shift
      DIC="$1"
      shift
   elif [ "$1" = "-l" ] ; then
      shift
      LIST_FILES=1
   else
      break
   fi
done

if [ -z "$*" ] ; then
   echo "args expected. Try $(basename "$0") -h" >&2
   exit 1
fi

match_list() {
   echo -n '\(^\|[^<:."'\'']\|\([^a-zA-Z)][.:]\)\)\<\('
   echo -n "$(sed -e 's/\<\([a-z]\)/\[\1\U\1\]/g'  \
      -e 's/ /\\|/g' <<< "$*")"
   echo -n '\)\>\($\|[^>."'\''(]\|\([.(][^a-zA-Z]\)\)'
}

readarray -t WORDS <<< "$(sed "s/\(·\)*->\(.*\)$/\1/" "$DIC")"
EXPR="$(match_list "${WORDS[@]}")"

readarray -t FILES <<< "$( grep -l "$EXPR" "$@" )"

if [ -n "$LIST_FILES" ] ; then
   echo "${FILES[@]}"
   exit 0
elif [ -z "${FILES[*]}" ] ; then
   exit 0
fi

readarray -t WORDS <<< "$(
   grep -h -o "$EXPR" "${FILES[@]}" |
   sed 's/^\(.\?[^a-zA-Z]\)\?\([a-zA-Z]*\)[^a-zA-Z]\?.\?$/\2/' |
   sort -u
)"

EXPR="$(match_list "${WORDS[@]}")"

WL="${WORDS[*]}"
SUBDIC="$(
   grep -f - <(
      cat "$DIC"
      grep '^[a-z]' "$DIC" | sed 's/\(.\)\(.*\)->\(.\)\(.*\)/\U\1\E\2->\U\3\E\4/'
   ) <<< "^${WL// /->$'\n'^}->"
)"

MARK="01;31"
E=$'\e'
D="$E\[m"
DD="$E\\\[m"

if [ -n "$FORCE" ] ; then
   sed -f <(
      echo -e "s/\($EXPR\)/$D\1$D/g;"
      echo -e "s/$D\(.\?[^a-zA-Z]\)\?\([a-zA-Z]*\)\([^a-zA-Z]*\)$D/\1$D\2$D\3/g"
      sed "s/^\(.*\)->\(.*\)/s\/$DD\1$DD\/\2\/g;/" <<< "$SUBDIC"
   ) -i "${FILES[@]}"
else
   grep --color=always -n "$EXPR" "${FILES[@]}"                                                 |
   sed "s/$E\[K//g"                                                                             |
   sed "s/\($E\[${MARK}m\)\(.\?[^a-zA-Z]\)\?\([a-zA-Z]*\)\([^a-zA-Z]*\)\($E\[m\)/\2\1\3\5\4/g"  |
   sed -f <(
      sed "s/^\(.*\)->\(.*\)/s\/\\\($E\\\[${MARK}m\1$E\\\[m\\\)\/\\\1$E\\\[1;33m->$E\\\[0;32m\2$E\\\[m\/g;/" <<< "$SUBDIC"
   )
fi
